In [1]:
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
In [3]:
df=pd.read_csv('Train.csv',na_values=['?','nan'])
df.head()
Out[3]:
timestamp Soil humidity 1 Irrigation field 1 Soil humidity 2 Irrigation field 2 Soil humidity 3 Irrigation field 3 Soil humidity 4 Irrigation field 4 Air temperature (C) Air humidity (%) Pressure (KPa) Wind speed (Km/h) Wind gust (Km/h) Wind direction (Deg)
0 23-02-2019 67.92 0.0 55.72 0.0 -1.56 1.0 26.57 1.0 19.52 55.04 101.50 2.13 6.30 225.00
1 23-02-2019 67.89 0.0 55.74 0.0 -1.51 1.0 26.58 1.0 19.49 55.17 101.50 2.01 10.46 123.75
2 23-02-2019 67.86 0.0 55.77 0.0 -1.47 1.0 26.59 1.0 19.47 55.30 101.51 1.90 14.63 22.50
3 23-02-2019 67.84 0.0 55.79 0.0 -1.42 1.0 26.61 1.0 19.54 54.20 101.51 2.28 16.08 123.75
4 23-02-2019 67.81 0.0 55.82 0.0 -1.38 1.0 26.62 1.0 19.61 53.09 101.51 2.66 17.52 225.00
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28049 entries, 0 to 28048
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   timestamp             28049 non-null  object 
 1   Soil humidity 1       8917 non-null   float64
 2   Irrigation field 1    10066 non-null  float64
 3   Soil humidity 2       26304 non-null  float64
 4   Irrigation field 2    28029 non-null  float64
 5   Soil humidity 3       16086 non-null  float64
 6   Irrigation field 3    17236 non-null  float64
 7   Soil humidity 4       26306 non-null  float64
 8   Irrigation field 4    28029 non-null  float64
 9   Air temperature (C)   23995 non-null  float64
 10  Air humidity (%)      23995 non-null  float64
 11  Pressure (KPa)        23995 non-null  float64
 12  Wind speed (Km/h)     23995 non-null  float64
 13  Wind gust (Km/h)      23995 non-null  float64
 14  Wind direction (Deg)  23995 non-null  float64
dtypes: float64(14), object(1)
memory usage: 3.2+ MB
In [5]:
df.describe()
Out[5]:
Soil humidity 1 Irrigation field 1 Soil humidity 2 Irrigation field 2 Soil humidity 3 Irrigation field 3 Soil humidity 4 Irrigation field 4 Air temperature (C) Air humidity (%) Pressure (KPa) Wind speed (Km/h) Wind gust (Km/h) Wind direction (Deg)
count 8917.000000 10066.000000 26304.000000 28029.000000 16086.000000 17236.000000 26306.000000 28029.000000 23995.000000 23995.000000 23995.000000 23995.000000 23995.000000 23995.000000
mean 63.025653 0.177727 18.767424 0.095615 25.046062 0.245243 17.512501 0.211531 24.263780 58.521052 101.131418 9.896898 41.744905 93.983796
std 9.801099 0.382302 22.903736 0.294068 19.998016 0.430243 14.247310 0.408401 6.756751 30.073448 0.218448 4.325666 24.168987 100.502500
min 36.000000 0.000000 -45.000000 0.000000 -38.000000 0.000000 -30.000000 0.000000 11.220000 0.590000 100.500000 0.000000 0.000000 0.000000
25% 58.210000 0.000000 3.850000 0.000000 10.220000 0.000000 8.000000 0.000000 18.580000 33.980000 100.980000 6.830000 21.820000 22.500000
50% 63.000000 0.000000 23.000000 0.000000 26.610000 0.000000 19.600000 0.000000 22.150000 61.310000 101.120000 9.530000 37.240000 22.500000
75% 68.000000 0.000000 35.310000 0.000000 41.360000 0.000000 29.050000 0.000000 29.590000 86.360000 101.260000 12.460000 59.275000 180.000000
max 88.000000 1.000000 59.000000 1.000000 68.000000 1.000000 47.000000 1.000000 45.560000 96.000000 101.860000 31.360000 133.330000 337.500000
In [6]:
df.isnull().sum()
Out[6]:
timestamp                   0
Soil humidity 1         19132
Irrigation field 1      17983
Soil humidity 2          1745
Irrigation field 2         20
Soil humidity 3         11963
Irrigation field 3      10813
Soil humidity 4          1743
Irrigation field 4         20
Air temperature (C)      4054
Air humidity (%)         4054
Pressure (KPa)           4054
Wind speed (Km/h)        4054
Wind gust (Km/h)         4054
Wind direction (Deg)     4054
dtype: int64
In [7]:
col_names=df.columns.values
print(col_names)
['timestamp' 'Soil humidity 1' 'Irrigation field 1' 'Soil humidity 2'
 'Irrigation field 2' 'Soil humidity 3' 'Irrigation field 3'
 'Soil humidity 4' 'Irrigation field 4' 'Air temperature (C)'
 'Air humidity (%)' 'Pressure (KPa)' 'Wind speed (Km/h)'
 'Wind gust (Km/h)' 'Wind direction (Deg)']
In [8]:
fig = px.scatter(df, y='Soil humidity 1',x='timestamp',color='Air humidity (%)', hover_data=['Irrigation field 1','Air temperature (C)','Pressure (KPa)', 'Wind speed (Km/h)','Wind gust (Km/h)','Wind direction (Deg)'])
fig.show()
In [9]:
sh1_df=df[['Soil humidity 1','Irrigation field 1','Air temperature (C)', 'Air humidity (%)','Pressure (KPa)','Wind speed (Km/h)', 'Wind gust (Km/h)','Wind direction (Deg)']]
sh1_df.head()
Out[9]:
Soil humidity 1 Irrigation field 1 Air temperature (C) Air humidity (%) Pressure (KPa) Wind speed (Km/h) Wind gust (Km/h) Wind direction (Deg)
0 67.92 0.0 19.52 55.04 101.50 2.13 6.30 225.00
1 67.89 0.0 19.49 55.17 101.50 2.01 10.46 123.75
2 67.86 0.0 19.47 55.30 101.51 1.90 14.63 22.50
3 67.84 0.0 19.54 54.20 101.51 2.28 16.08 123.75
4 67.81 0.0 19.61 53.09 101.51 2.66 17.52 225.00
In [10]:
corr_matrix=sh1_df.corr()
corr_matrix["Soil humidity 1"].sort_values(ascending=False)
Out[10]:
Soil humidity 1         1.000000
Irrigation field 1      0.170783
Air temperature (C)     0.127492
Wind direction (Deg)    0.037806
Pressure (KPa)          0.015354
Wind gust (Km/h)       -0.057814
Wind speed (Km/h)      -0.095873
Air humidity (%)       -0.322538
Name: Soil humidity 1, dtype: float64
In [11]:
fig = px.scatter(df, y='Soil humidity 2',x='timestamp',color='Air humidity (%)', hover_data=['Irrigation field 2','Air temperature (C)','Pressure (KPa)', 'Wind speed (Km/h)','Wind gust (Km/h)','Wind direction (Deg)'])
fig.show()
In [12]:
sh2_df=df[['Soil humidity 2','Irrigation field 2','Air temperature (C)', 'Air humidity (%)','Pressure (KPa)','Wind speed (Km/h)', 'Wind gust (Km/h)','Wind direction (Deg)']]
sh2_df.head()
Out[12]:
Soil humidity 2 Irrigation field 2 Air temperature (C) Air humidity (%) Pressure (KPa) Wind speed (Km/h) Wind gust (Km/h) Wind direction (Deg)
0 55.72 0.0 19.52 55.04 101.50 2.13 6.30 225.00
1 55.74 0.0 19.49 55.17 101.50 2.01 10.46 123.75
2 55.77 0.0 19.47 55.30 101.51 1.90 14.63 22.50
3 55.79 0.0 19.54 54.20 101.51 2.28 16.08 123.75
4 55.82 0.0 19.61 53.09 101.51 2.66 17.52 225.00
In [13]:
corr_matrix=sh2_df.corr()
corr_matrix["Soil humidity 2"].sort_values(ascending=False)
Out[13]:
Soil humidity 2         1.000000
Pressure (KPa)          0.289770
Wind direction (Deg)    0.211908
Irrigation field 2      0.126485
Air temperature (C)    -0.005512
Wind gust (Km/h)       -0.114120
Wind speed (Km/h)      -0.172694
Air humidity (%)       -0.297771
Name: Soil humidity 2, dtype: float64
In [14]:
fig = px.scatter(df, y='Soil humidity 3',x='timestamp',color='Air humidity (%)', hover_data=['Irrigation field 3','Air temperature (C)','Pressure (KPa)', 'Wind speed (Km/h)','Wind gust (Km/h)','Wind direction (Deg)'])
fig.show()
In [15]:
sh3_df=df[['Soil humidity 3','Irrigation field 3','Air temperature (C)', 'Air humidity (%)','Pressure (KPa)','Wind speed (Km/h)', 'Wind gust (Km/h)','Wind direction (Deg)']]
sh3_df.head()
Out[15]:
Soil humidity 3 Irrigation field 3 Air temperature (C) Air humidity (%) Pressure (KPa) Wind speed (Km/h) Wind gust (Km/h) Wind direction (Deg)
0 -1.56 1.0 19.52 55.04 101.50 2.13 6.30 225.00
1 -1.51 1.0 19.49 55.17 101.50 2.01 10.46 123.75
2 -1.47 1.0 19.47 55.30 101.51 1.90 14.63 22.50
3 -1.42 1.0 19.54 54.20 101.51 2.28 16.08 123.75
4 -1.38 1.0 19.61 53.09 101.51 2.66 17.52 225.00
In [16]:
corr_matrix=sh3_df.corr()
corr_matrix["Soil humidity 3"].sort_values(ascending=False)
Out[16]:
Soil humidity 3         1.000000
Wind direction (Deg)    0.150181
Pressure (KPa)          0.078811
Air temperature (C)    -0.015056
Wind gust (Km/h)       -0.054601
Air humidity (%)       -0.137482
Wind speed (Km/h)      -0.161307
Irrigation field 3     -0.222228
Name: Soil humidity 3, dtype: float64
In [17]:
fig = px.scatter(df, y='Soil humidity 4',x='timestamp',color='Air humidity (%)', hover_data=['Irrigation field 4','Air temperature (C)','Pressure (KPa)', 'Wind speed (Km/h)','Wind gust (Km/h)','Wind direction (Deg)'])
fig.show()
In [18]:
sh4_df=df[['Soil humidity 4','Irrigation field 4','Air temperature (C)', 'Air humidity (%)','Pressure (KPa)','Wind speed (Km/h)', 'Wind gust (Km/h)','Wind direction (Deg)']]
sh4_df.head()
Out[18]:
Soil humidity 4 Irrigation field 4 Air temperature (C) Air humidity (%) Pressure (KPa) Wind speed (Km/h) Wind gust (Km/h) Wind direction (Deg)
0 26.57 1.0 19.52 55.04 101.50 2.13 6.30 225.00
1 26.58 1.0 19.49 55.17 101.50 2.01 10.46 123.75
2 26.59 1.0 19.47 55.30 101.51 1.90 14.63 22.50
3 26.61 1.0 19.54 54.20 101.51 2.28 16.08 123.75
4 26.62 1.0 19.61 53.09 101.51 2.66 17.52 225.00
In [19]:
corr_matrix=sh4_df.corr()
corr_matrix["Soil humidity 4"].sort_values(ascending=False)
Out[19]:
Soil humidity 4         1.000000
Irrigation field 4      0.210986
Wind direction (Deg)    0.062345
Air humidity (%)       -0.035806
Wind gust (Km/h)       -0.049485
Pressure (KPa)         -0.050020
Air temperature (C)    -0.065449
Wind speed (Km/h)      -0.082555
Name: Soil humidity 4, dtype: float64
In [ ]: